#!/usr/bin/env python
# coding=utf-8
# __author__ = 'Yunchao Ling'

from pymongo import MongoClient
import sys


def formatLine(line):
    splitline = line.split(",")
    pmid = int(splitline[1])
    descriptor = splitline[2][1:-1].split(".")[0]
    qualifier = []
    for i in xrange(3, 6):
        if (splitline[i] != "NULL") and (splitline[i] != "''"):
            qualifier.append(splitline[i].split(".")[0][1:])
    current_record = {}
    current_record["pmid"] = pmid
    current_record["mesh"] = {}
    current_record["mesh"]["descriptor"] = descriptor
    if len(qualifier) != 0:
        current_record["mesh"]["qualifier"] = qualifier
    return current_record


if __name__ == "__main__":
    DB_NAME = "test2"
    COLLECTION_NAME = "mesh"

    connection = MongoClient('10.188.188.101', 27017)
    db = connection[DB_NAME]
    collection = db[COLLECTION_NAME]

    count = 0
    records = []
    infile = open(sys.argv[1], 'r')
    for line in infile:
        line = line.rstrip()
        current_record = formatLine(line)
        records.append(current_record)
        count += 1
        if count == 1000:
            collection.insert_many(records)
            del records[:]
            count = 0
    infile.close()
    if count != 0:
        collection.insert_many(records)
    connection.close()
